__author__ = 'Administrator'
from Preprocessing import *
import numpy


class Sentence(object):
    def __init__(self, context=str(), label=-1, words_idx=list()):
        self.context = context
        self.label = label
        self.words_idx = words_idx


class Datasets(object):
    def __init__(self,
                 preprocessing=Preprocessing(),
                 ):
        self.preprocessing = preprocessing

    def get_data_from_file(self, padding, filename="data//train.txt"):
        print "get the data from file -----> ", filename
        sentences = list()
        openfile = open(filename, 'r')
        for each in openfile:
            sentence = Sentence()
            words = each.lstrip().lstrip().strip("\n").split("@")
            sentence.label = numpy.array([int(words[0])])
            sentence.words_idx =[self.preprocessing.words_2_idx["</s>"]] * padding + \
                                [self.preprocessing.words_2_idx[words[i]] for i in xrange(1, len(words))] + \
                                [self.preprocessing.words_2_idx["</s>"]] * padding
            sentences.append(sentence)
        return sentences

# data = Datasets()
# sentences = data.get_data_from_file(filename="data//trec_train.txt")
#
# for sentence in sentences:
#     print sentence.label.shape, sentence.words_idx, data.preprocessing.WORDS[sentence.words_idx].shape
